{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#导入数据\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "#查看数据分布是否对称\n",
    "from scipy.stats import skew\n",
    "#可视化\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from IPython.display import display\n",
    "# Definitions\n",
    "pd.set_option('display.float_format', lambda x: '%.3f' % x)\n",
    "%matplotlib inline\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "读入数据,数据预处理对训练数据和测试数据需进行同样处理，因此将二者一起读入\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>instant</th>\n",
       "      <th>dteday</th>\n",
       "      <th>season</th>\n",
       "      <th>yr</th>\n",
       "      <th>mnth</th>\n",
       "      <th>holiday</th>\n",
       "      <th>weekday</th>\n",
       "      <th>workingday</th>\n",
       "      <th>weathersit</th>\n",
       "      <th>temp</th>\n",
       "      <th>atemp</th>\n",
       "      <th>hum</th>\n",
       "      <th>windspeed</th>\n",
       "      <th>casual</th>\n",
       "      <th>registered</th>\n",
       "      <th>cnt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2011-01-01</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.344</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.806</td>\n",
       "      <td>0.160</td>\n",
       "      <td>331</td>\n",
       "      <td>654</td>\n",
       "      <td>985</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>2011-01-02</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.363</td>\n",
       "      <td>0.354</td>\n",
       "      <td>0.696</td>\n",
       "      <td>0.249</td>\n",
       "      <td>131</td>\n",
       "      <td>670</td>\n",
       "      <td>801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>2011-01-03</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.196</td>\n",
       "      <td>0.189</td>\n",
       "      <td>0.437</td>\n",
       "      <td>0.248</td>\n",
       "      <td>120</td>\n",
       "      <td>1229</td>\n",
       "      <td>1349</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>2011-01-04</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.200</td>\n",
       "      <td>0.212</td>\n",
       "      <td>0.590</td>\n",
       "      <td>0.160</td>\n",
       "      <td>108</td>\n",
       "      <td>1454</td>\n",
       "      <td>1562</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>2011-01-05</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.227</td>\n",
       "      <td>0.229</td>\n",
       "      <td>0.437</td>\n",
       "      <td>0.187</td>\n",
       "      <td>82</td>\n",
       "      <td>1518</td>\n",
       "      <td>1600</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   instant      dteday  season  yr  mnth  holiday  weekday  workingday  \\\n",
       "0        1  2011-01-01       1   0     1        0        6           0   \n",
       "1        2  2011-01-02       1   0     1        0        0           0   \n",
       "2        3  2011-01-03       1   0     1        0        1           1   \n",
       "3        4  2011-01-04       1   0     1        0        2           1   \n",
       "4        5  2011-01-05       1   0     1        0        3           1   \n",
       "\n",
       "   weathersit  temp  atemp   hum  windspeed  casual  registered   cnt  \n",
       "0           2 0.344  0.364 0.806      0.160     331         654   985  \n",
       "1           2 0.363  0.354 0.696      0.249     131         670   801  \n",
       "2           1 0.196  0.189 0.437      0.248     120        1229  1349  \n",
       "3           1 0.200  0.212 0.590      0.160     108        1454  1562  \n",
       "4           1 0.227  0.229 0.437      0.187      82        1518  1600  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data=pd.read_csv('day.csv')\n",
    "data.head()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "数据探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 731 entries, 0 to 730\n",
      "Data columns (total 16 columns):\n",
      "instant       731 non-null int64\n",
      "dteday        731 non-null object\n",
      "season        731 non-null int64\n",
      "yr            731 non-null int64\n",
      "mnth          731 non-null int64\n",
      "holiday       731 non-null int64\n",
      "weekday       731 non-null int64\n",
      "workingday    731 non-null int64\n",
      "weathersit    731 non-null int64\n",
      "temp          731 non-null float64\n",
      "atemp         731 non-null float64\n",
      "hum           731 non-null float64\n",
      "windspeed     731 non-null float64\n",
      "casual        731 non-null int64\n",
      "registered    731 non-null int64\n",
      "cnt           731 non-null int64\n",
      "dtypes: float64(4), int64(11), object(1)\n",
      "memory usage: 91.4+ KB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>instant</th>\n",
       "      <th>season</th>\n",
       "      <th>yr</th>\n",
       "      <th>mnth</th>\n",
       "      <th>holiday</th>\n",
       "      <th>weekday</th>\n",
       "      <th>workingday</th>\n",
       "      <th>weathersit</th>\n",
       "      <th>temp</th>\n",
       "      <th>atemp</th>\n",
       "      <th>hum</th>\n",
       "      <th>windspeed</th>\n",
       "      <th>casual</th>\n",
       "      <th>registered</th>\n",
       "      <th>cnt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>731.000</td>\n",
       "      <td>731.000</td>\n",
       "      <td>731.000</td>\n",
       "      <td>731.000</td>\n",
       "      <td>731.000</td>\n",
       "      <td>731.000</td>\n",
       "      <td>731.000</td>\n",
       "      <td>731.000</td>\n",
       "      <td>731.000</td>\n",
       "      <td>731.000</td>\n",
       "      <td>731.000</td>\n",
       "      <td>731.000</td>\n",
       "      <td>731.000</td>\n",
       "      <td>731.000</td>\n",
       "      <td>731.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>366.000</td>\n",
       "      <td>2.497</td>\n",
       "      <td>0.501</td>\n",
       "      <td>6.520</td>\n",
       "      <td>0.029</td>\n",
       "      <td>2.997</td>\n",
       "      <td>0.684</td>\n",
       "      <td>1.395</td>\n",
       "      <td>0.495</td>\n",
       "      <td>0.474</td>\n",
       "      <td>0.628</td>\n",
       "      <td>0.190</td>\n",
       "      <td>848.176</td>\n",
       "      <td>3656.172</td>\n",
       "      <td>4504.349</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>211.166</td>\n",
       "      <td>1.111</td>\n",
       "      <td>0.500</td>\n",
       "      <td>3.452</td>\n",
       "      <td>0.167</td>\n",
       "      <td>2.005</td>\n",
       "      <td>0.465</td>\n",
       "      <td>0.545</td>\n",
       "      <td>0.183</td>\n",
       "      <td>0.163</td>\n",
       "      <td>0.142</td>\n",
       "      <td>0.077</td>\n",
       "      <td>686.622</td>\n",
       "      <td>1560.256</td>\n",
       "      <td>1937.211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000</td>\n",
       "      <td>1.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>1.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>1.000</td>\n",
       "      <td>0.059</td>\n",
       "      <td>0.079</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.022</td>\n",
       "      <td>2.000</td>\n",
       "      <td>20.000</td>\n",
       "      <td>22.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>183.500</td>\n",
       "      <td>2.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>4.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>1.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>1.000</td>\n",
       "      <td>0.337</td>\n",
       "      <td>0.338</td>\n",
       "      <td>0.520</td>\n",
       "      <td>0.135</td>\n",
       "      <td>315.500</td>\n",
       "      <td>2497.000</td>\n",
       "      <td>3152.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>366.000</td>\n",
       "      <td>3.000</td>\n",
       "      <td>1.000</td>\n",
       "      <td>7.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>3.000</td>\n",
       "      <td>1.000</td>\n",
       "      <td>1.000</td>\n",
       "      <td>0.498</td>\n",
       "      <td>0.487</td>\n",
       "      <td>0.627</td>\n",
       "      <td>0.181</td>\n",
       "      <td>713.000</td>\n",
       "      <td>3662.000</td>\n",
       "      <td>4548.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>548.500</td>\n",
       "      <td>3.000</td>\n",
       "      <td>1.000</td>\n",
       "      <td>10.000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>5.000</td>\n",
       "      <td>1.000</td>\n",
       "      <td>2.000</td>\n",
       "      <td>0.655</td>\n",
       "      <td>0.609</td>\n",
       "      <td>0.730</td>\n",
       "      <td>0.233</td>\n",
       "      <td>1096.000</td>\n",
       "      <td>4776.500</td>\n",
       "      <td>5956.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>731.000</td>\n",
       "      <td>4.000</td>\n",
       "      <td>1.000</td>\n",
       "      <td>12.000</td>\n",
       "      <td>1.000</td>\n",
       "      <td>6.000</td>\n",
       "      <td>1.000</td>\n",
       "      <td>3.000</td>\n",
       "      <td>0.862</td>\n",
       "      <td>0.841</td>\n",
       "      <td>0.973</td>\n",
       "      <td>0.507</td>\n",
       "      <td>3410.000</td>\n",
       "      <td>6946.000</td>\n",
       "      <td>8714.000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       instant  season      yr    mnth  holiday  weekday  workingday  \\\n",
       "count  731.000 731.000 731.000 731.000  731.000  731.000     731.000   \n",
       "mean   366.000   2.497   0.501   6.520    0.029    2.997       0.684   \n",
       "std    211.166   1.111   0.500   3.452    0.167    2.005       0.465   \n",
       "min      1.000   1.000   0.000   1.000    0.000    0.000       0.000   \n",
       "25%    183.500   2.000   0.000   4.000    0.000    1.000       0.000   \n",
       "50%    366.000   3.000   1.000   7.000    0.000    3.000       1.000   \n",
       "75%    548.500   3.000   1.000  10.000    0.000    5.000       1.000   \n",
       "max    731.000   4.000   1.000  12.000    1.000    6.000       1.000   \n",
       "\n",
       "       weathersit    temp   atemp     hum  windspeed   casual  registered  \\\n",
       "count     731.000 731.000 731.000 731.000    731.000  731.000     731.000   \n",
       "mean        1.395   0.495   0.474   0.628      0.190  848.176    3656.172   \n",
       "std         0.545   0.183   0.163   0.142      0.077  686.622    1560.256   \n",
       "min         1.000   0.059   0.079   0.000      0.022    2.000      20.000   \n",
       "25%         1.000   0.337   0.338   0.520      0.135  315.500    2497.000   \n",
       "50%         1.000   0.498   0.487   0.627      0.181  713.000    3662.000   \n",
       "75%         2.000   0.655   0.609   0.730      0.233 1096.000    4776.500   \n",
       "max         3.000   0.862   0.841   0.973      0.507 3410.000    6946.000   \n",
       "\n",
       "           cnt  \n",
       "count  731.000  \n",
       "mean  4504.349  \n",
       "std   1937.211  \n",
       "min     22.000  \n",
       "25%   3152.000  \n",
       "50%   4548.000  \n",
       "75%   5956.000  \n",
       "max   8714.000  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "特征工程\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "season 属性取不同值得次数\n",
      "3    188\n",
      "2    184\n",
      "1    181\n",
      "4    178\n",
      "Name: season, dtype: int64\n",
      "\n",
      "mnth 属性取不同值得次数\n",
      "12    62\n",
      "10    62\n",
      "8     62\n",
      "7     62\n",
      "5     62\n",
      "3     62\n",
      "1     62\n",
      "11    60\n",
      "9     60\n",
      "6     60\n",
      "4     60\n",
      "2     57\n",
      "Name: mnth, dtype: int64\n",
      "\n",
      "holiday 属性取不同值得次数\n",
      "0    710\n",
      "1     21\n",
      "Name: holiday, dtype: int64\n",
      "\n",
      "weekday 属性取不同值得次数\n",
      "6    105\n",
      "1    105\n",
      "0    105\n",
      "5    104\n",
      "4    104\n",
      "3    104\n",
      "2    104\n",
      "Name: weekday, dtype: int64\n",
      "\n",
      "workingday 属性取不同值得次数\n",
      "1    500\n",
      "0    231\n",
      "Name: workingday, dtype: int64\n",
      "\n",
      "weathersit 属性取不同值得次数\n",
      "1    463\n",
      "2    247\n",
      "3     21\n",
      "Name: weathersit, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "\n",
    "categories=['season','mnth','holiday','weekday','workingday','weathersit']\n",
    "for c in categories:\n",
    "    print '\\n%s 属性取不同值得次数'%c\n",
    "    print data[c].value_counts()\n",
    "    data[c] = data[c].astype('object')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "对于类型数据进行one_hot编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>season_1</th>\n",
       "      <th>season_2</th>\n",
       "      <th>season_3</th>\n",
       "      <th>season_4</th>\n",
       "      <th>mnth_1</th>\n",
       "      <th>mnth_2</th>\n",
       "      <th>mnth_3</th>\n",
       "      <th>mnth_4</th>\n",
       "      <th>mnth_5</th>\n",
       "      <th>mnth_6</th>\n",
       "      <th>...</th>\n",
       "      <th>weekday_2</th>\n",
       "      <th>weekday_3</th>\n",
       "      <th>weekday_4</th>\n",
       "      <th>weekday_5</th>\n",
       "      <th>weekday_6</th>\n",
       "      <th>workingday_0</th>\n",
       "      <th>workingday_1</th>\n",
       "      <th>weathersit_1</th>\n",
       "      <th>weathersit_2</th>\n",
       "      <th>weathersit_3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 30 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   season_1  season_2  season_3  season_4  mnth_1  mnth_2  mnth_3  mnth_4  \\\n",
       "0         1         0         0         0       1       0       0       0   \n",
       "1         1         0         0         0       1       0       0       0   \n",
       "2         1         0         0         0       1       0       0       0   \n",
       "3         1         0         0         0       1       0       0       0   \n",
       "4         1         0         0         0       1       0       0       0   \n",
       "\n",
       "   mnth_5  mnth_6      ...       weekday_2  weekday_3  weekday_4  weekday_5  \\\n",
       "0       0       0      ...               0          0          0          0   \n",
       "1       0       0      ...               0          0          0          0   \n",
       "2       0       0      ...               0          0          0          0   \n",
       "3       0       0      ...               1          0          0          0   \n",
       "4       0       0      ...               0          1          0          0   \n",
       "\n",
       "   weekday_6  workingday_0  workingday_1  weathersit_1  weathersit_2  \\\n",
       "0          1             1             0             0             1   \n",
       "1          0             1             0             0             1   \n",
       "2          0             0             1             1             0   \n",
       "3          0             0             1             1             0   \n",
       "4          0             0             1             1             0   \n",
       "\n",
       "   weathersit_3  \n",
       "0             0  \n",
       "1             0  \n",
       "2             0  \n",
       "3             0  \n",
       "4             0  \n",
       "\n",
       "[5 rows x 30 columns]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    " t_data1 = data[categories]\n",
    " t_data1 = pd.get_dummies(t_data1)\n",
    " t_data1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>temp</th>\n",
       "      <th>atemp</th>\n",
       "      <th>hum</th>\n",
       "      <th>windspeed</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.355</td>\n",
       "      <td>0.374</td>\n",
       "      <td>0.829</td>\n",
       "      <td>0.285</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.379</td>\n",
       "      <td>0.361</td>\n",
       "      <td>0.716</td>\n",
       "      <td>0.466</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.171</td>\n",
       "      <td>0.145</td>\n",
       "      <td>0.450</td>\n",
       "      <td>0.466</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.176</td>\n",
       "      <td>0.175</td>\n",
       "      <td>0.607</td>\n",
       "      <td>0.284</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.209</td>\n",
       "      <td>0.197</td>\n",
       "      <td>0.449</td>\n",
       "      <td>0.339</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   temp  atemp   hum  windspeed\n",
       "0 0.355  0.374 0.829      0.285\n",
       "1 0.379  0.361 0.716      0.466\n",
       "2 0.171  0.145 0.450      0.466\n",
       "3 0.176  0.175 0.607      0.284\n",
       "4 0.209  0.197 0.449      0.339"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对数据进行归一化\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "mn_x = MinMaxScaler()\n",
    "numerical_features = ['temp','atemp','hum','windspeed']\n",
    "temp = mn_x.fit_transform(data[numerical_features])\n",
    "\n",
    "t_data2 = pd.DataFrame(data=temp, columns=numerical_features, index =data.index)\n",
    "t_data2.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "合并数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>season_1</th>\n",
       "      <th>season_2</th>\n",
       "      <th>season_3</th>\n",
       "      <th>season_4</th>\n",
       "      <th>mnth_1</th>\n",
       "      <th>mnth_2</th>\n",
       "      <th>mnth_3</th>\n",
       "      <th>mnth_4</th>\n",
       "      <th>mnth_5</th>\n",
       "      <th>mnth_6</th>\n",
       "      <th>...</th>\n",
       "      <th>weekday_6</th>\n",
       "      <th>workingday_0</th>\n",
       "      <th>workingday_1</th>\n",
       "      <th>weathersit_1</th>\n",
       "      <th>weathersit_2</th>\n",
       "      <th>weathersit_3</th>\n",
       "      <th>temp</th>\n",
       "      <th>atemp</th>\n",
       "      <th>hum</th>\n",
       "      <th>windspeed</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.355</td>\n",
       "      <td>0.374</td>\n",
       "      <td>0.829</td>\n",
       "      <td>0.285</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.379</td>\n",
       "      <td>0.361</td>\n",
       "      <td>0.716</td>\n",
       "      <td>0.466</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.171</td>\n",
       "      <td>0.145</td>\n",
       "      <td>0.450</td>\n",
       "      <td>0.466</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.176</td>\n",
       "      <td>0.175</td>\n",
       "      <td>0.607</td>\n",
       "      <td>0.284</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.209</td>\n",
       "      <td>0.197</td>\n",
       "      <td>0.449</td>\n",
       "      <td>0.339</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 34 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   season_1  season_2  season_3  season_4  mnth_1  mnth_2  mnth_3  mnth_4  \\\n",
       "0         1         0         0         0       1       0       0       0   \n",
       "1         1         0         0         0       1       0       0       0   \n",
       "2         1         0         0         0       1       0       0       0   \n",
       "3         1         0         0         0       1       0       0       0   \n",
       "4         1         0         0         0       1       0       0       0   \n",
       "\n",
       "   mnth_5  mnth_6    ...      weekday_6  workingday_0  workingday_1  \\\n",
       "0       0       0    ...              1             1             0   \n",
       "1       0       0    ...              0             1             0   \n",
       "2       0       0    ...              0             0             1   \n",
       "3       0       0    ...              0             0             1   \n",
       "4       0       0    ...              0             0             1   \n",
       "\n",
       "   weathersit_1  weathersit_2  weathersit_3  temp  atemp   hum  windspeed  \n",
       "0             0             1             0 0.355  0.374 0.829      0.285  \n",
       "1             0             1             0 0.379  0.361 0.716      0.466  \n",
       "2             1             0             0 0.171  0.145 0.450      0.466  \n",
       "3             1             0             0 0.176  0.175 0.607      0.284  \n",
       "4             1             0             0 0.209  0.197 0.449      0.339  \n",
       "\n",
       "[5 rows x 34 columns]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t_data3=pd.concat([t_data1,t_data2],axis = 1,ignore_index=False)\n",
    "t_data3.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>instant</th>\n",
       "      <th>season_1</th>\n",
       "      <th>season_2</th>\n",
       "      <th>season_3</th>\n",
       "      <th>season_4</th>\n",
       "      <th>mnth_1</th>\n",
       "      <th>mnth_2</th>\n",
       "      <th>mnth_3</th>\n",
       "      <th>mnth_4</th>\n",
       "      <th>mnth_5</th>\n",
       "      <th>...</th>\n",
       "      <th>workingday_1</th>\n",
       "      <th>weathersit_1</th>\n",
       "      <th>weathersit_2</th>\n",
       "      <th>weathersit_3</th>\n",
       "      <th>temp</th>\n",
       "      <th>atemp</th>\n",
       "      <th>hum</th>\n",
       "      <th>windspeed</th>\n",
       "      <th>yr</th>\n",
       "      <th>cnt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.355</td>\n",
       "      <td>0.374</td>\n",
       "      <td>0.829</td>\n",
       "      <td>0.285</td>\n",
       "      <td>0</td>\n",
       "      <td>985</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.379</td>\n",
       "      <td>0.361</td>\n",
       "      <td>0.716</td>\n",
       "      <td>0.466</td>\n",
       "      <td>0</td>\n",
       "      <td>801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.171</td>\n",
       "      <td>0.145</td>\n",
       "      <td>0.450</td>\n",
       "      <td>0.466</td>\n",
       "      <td>0</td>\n",
       "      <td>1349</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.176</td>\n",
       "      <td>0.175</td>\n",
       "      <td>0.607</td>\n",
       "      <td>0.284</td>\n",
       "      <td>0</td>\n",
       "      <td>1562</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.209</td>\n",
       "      <td>0.197</td>\n",
       "      <td>0.449</td>\n",
       "      <td>0.339</td>\n",
       "      <td>0</td>\n",
       "      <td>1600</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 37 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   instant  season_1  season_2  season_3  season_4  mnth_1  mnth_2  mnth_3  \\\n",
       "0        1         1         0         0         0       1       0       0   \n",
       "1        2         1         0         0         0       1       0       0   \n",
       "2        3         1         0         0         0       1       0       0   \n",
       "3        4         1         0         0         0       1       0       0   \n",
       "4        5         1         0         0         0       1       0       0   \n",
       "\n",
       "   mnth_4  mnth_5  ...   workingday_1  weathersit_1  weathersit_2  \\\n",
       "0       0       0  ...              0             0             1   \n",
       "1       0       0  ...              0             0             1   \n",
       "2       0       0  ...              1             1             0   \n",
       "3       0       0  ...              1             1             0   \n",
       "4       0       0  ...              1             1             0   \n",
       "\n",
       "   weathersit_3  temp  atemp   hum  windspeed  yr   cnt  \n",
       "0             0 0.355  0.374 0.829      0.285   0   985  \n",
       "1             0 0.379  0.361 0.716      0.466   0   801  \n",
       "2             0 0.171  0.145 0.450      0.466   0  1349  \n",
       "3             0 0.176  0.175 0.607      0.284   0  1562  \n",
       "4             0 0.209  0.197 0.449      0.339   0  1600  \n",
       "\n",
       "[5 rows x 37 columns]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FE_data=pd.concat([data['instant'],t_data3,data['yr'],data['cnt']],axis=1)\n",
    "##FE_data.to_csv('FE_data.csv',index=False)\n",
    "FE_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 731 entries, 0 to 730\n",
      "Data columns (total 37 columns):\n",
      "instant         731 non-null int64\n",
      "season_1        731 non-null uint8\n",
      "season_2        731 non-null uint8\n",
      "season_3        731 non-null uint8\n",
      "season_4        731 non-null uint8\n",
      "mnth_1          731 non-null uint8\n",
      "mnth_2          731 non-null uint8\n",
      "mnth_3          731 non-null uint8\n",
      "mnth_4          731 non-null uint8\n",
      "mnth_5          731 non-null uint8\n",
      "mnth_6          731 non-null uint8\n",
      "mnth_7          731 non-null uint8\n",
      "mnth_8          731 non-null uint8\n",
      "mnth_9          731 non-null uint8\n",
      "mnth_10         731 non-null uint8\n",
      "mnth_11         731 non-null uint8\n",
      "mnth_12         731 non-null uint8\n",
      "holiday_0       731 non-null uint8\n",
      "holiday_1       731 non-null uint8\n",
      "weekday_0       731 non-null uint8\n",
      "weekday_1       731 non-null uint8\n",
      "weekday_2       731 non-null uint8\n",
      "weekday_3       731 non-null uint8\n",
      "weekday_4       731 non-null uint8\n",
      "weekday_5       731 non-null uint8\n",
      "weekday_6       731 non-null uint8\n",
      "workingday_0    731 non-null uint8\n",
      "workingday_1    731 non-null uint8\n",
      "weathersit_1    731 non-null uint8\n",
      "weathersit_2    731 non-null uint8\n",
      "weathersit_3    731 non-null uint8\n",
      "temp            731 non-null float64\n",
      "atemp           731 non-null float64\n",
      "hum             731 non-null float64\n",
      "windspeed       731 non-null float64\n",
      "yr              731 non-null int64\n",
      "cnt             731 non-null int64\n",
      "dtypes: float64(4), int64(3), uint8(30)\n",
      "memory usage: 61.5 KB\n"
     ]
    }
   ],
   "source": [
    "FE_data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>instant</th>\n",
       "      <th>season_1</th>\n",
       "      <th>season_2</th>\n",
       "      <th>season_3</th>\n",
       "      <th>season_4</th>\n",
       "      <th>mnth_1</th>\n",
       "      <th>mnth_2</th>\n",
       "      <th>mnth_3</th>\n",
       "      <th>mnth_4</th>\n",
       "      <th>mnth_5</th>\n",
       "      <th>...</th>\n",
       "      <th>workingday_1</th>\n",
       "      <th>weathersit_1</th>\n",
       "      <th>weathersit_2</th>\n",
       "      <th>weathersit_3</th>\n",
       "      <th>temp</th>\n",
       "      <th>atemp</th>\n",
       "      <th>hum</th>\n",
       "      <th>windspeed</th>\n",
       "      <th>yr</th>\n",
       "      <th>cnt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.355</td>\n",
       "      <td>0.374</td>\n",
       "      <td>0.829</td>\n",
       "      <td>0.285</td>\n",
       "      <td>0</td>\n",
       "      <td>985</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.379</td>\n",
       "      <td>0.361</td>\n",
       "      <td>0.716</td>\n",
       "      <td>0.466</td>\n",
       "      <td>0</td>\n",
       "      <td>801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.171</td>\n",
       "      <td>0.145</td>\n",
       "      <td>0.450</td>\n",
       "      <td>0.466</td>\n",
       "      <td>0</td>\n",
       "      <td>1349</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.176</td>\n",
       "      <td>0.175</td>\n",
       "      <td>0.607</td>\n",
       "      <td>0.284</td>\n",
       "      <td>0</td>\n",
       "      <td>1562</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.209</td>\n",
       "      <td>0.197</td>\n",
       "      <td>0.449</td>\n",
       "      <td>0.339</td>\n",
       "      <td>0</td>\n",
       "      <td>1600</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 37 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   instant  season_1  season_2  season_3  season_4  mnth_1  mnth_2  mnth_3  \\\n",
       "0        1         1         0         0         0       1       0       0   \n",
       "1        2         1         0         0         0       1       0       0   \n",
       "2        3         1         0         0         0       1       0       0   \n",
       "3        4         1         0         0         0       1       0       0   \n",
       "4        5         1         0         0         0       1       0       0   \n",
       "\n",
       "   mnth_4  mnth_5  ...   workingday_1  weathersit_1  weathersit_2  \\\n",
       "0       0       0  ...              0             0             1   \n",
       "1       0       0  ...              0             0             1   \n",
       "2       0       0  ...              1             1             0   \n",
       "3       0       0  ...              1             1             0   \n",
       "4       0       0  ...              1             1             0   \n",
       "\n",
       "   weathersit_3  temp  atemp   hum  windspeed  yr   cnt  \n",
       "0             0 0.355  0.374 0.829      0.285   0   985  \n",
       "1             0 0.379  0.361 0.716      0.466   0   801  \n",
       "2             0 0.171  0.145 0.450      0.466   0  1349  \n",
       "3             0 0.176  0.175 0.607      0.284   0  1562  \n",
       "4             0 0.209  0.197 0.449      0.339   0  1600  \n",
       "\n",
       "[5 rows x 37 columns]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FE_data_train=FE_data[FE_data.yr==0]\n",
    "FE_data_train.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 365 entries, 0 to 364\n",
      "Data columns (total 37 columns):\n",
      "instant         365 non-null int64\n",
      "season_1        365 non-null uint8\n",
      "season_2        365 non-null uint8\n",
      "season_3        365 non-null uint8\n",
      "season_4        365 non-null uint8\n",
      "mnth_1          365 non-null uint8\n",
      "mnth_2          365 non-null uint8\n",
      "mnth_3          365 non-null uint8\n",
      "mnth_4          365 non-null uint8\n",
      "mnth_5          365 non-null uint8\n",
      "mnth_6          365 non-null uint8\n",
      "mnth_7          365 non-null uint8\n",
      "mnth_8          365 non-null uint8\n",
      "mnth_9          365 non-null uint8\n",
      "mnth_10         365 non-null uint8\n",
      "mnth_11         365 non-null uint8\n",
      "mnth_12         365 non-null uint8\n",
      "holiday_0       365 non-null uint8\n",
      "holiday_1       365 non-null uint8\n",
      "weekday_0       365 non-null uint8\n",
      "weekday_1       365 non-null uint8\n",
      "weekday_2       365 non-null uint8\n",
      "weekday_3       365 non-null uint8\n",
      "weekday_4       365 non-null uint8\n",
      "weekday_5       365 non-null uint8\n",
      "weekday_6       365 non-null uint8\n",
      "workingday_0    365 non-null uint8\n",
      "workingday_1    365 non-null uint8\n",
      "weathersit_1    365 non-null uint8\n",
      "weathersit_2    365 non-null uint8\n",
      "weathersit_3    365 non-null uint8\n",
      "temp            365 non-null float64\n",
      "atemp           365 non-null float64\n",
      "hum             365 non-null float64\n",
      "windspeed       365 non-null float64\n",
      "yr              365 non-null int64\n",
      "cnt             365 non-null int64\n",
      "dtypes: float64(4), int64(3), uint8(30)\n",
      "memory usage: 33.5 KB\n"
     ]
    }
   ],
   "source": [
    "FE_data_train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>instant</th>\n",
       "      <th>season_1</th>\n",
       "      <th>season_2</th>\n",
       "      <th>season_3</th>\n",
       "      <th>season_4</th>\n",
       "      <th>mnth_1</th>\n",
       "      <th>mnth_2</th>\n",
       "      <th>mnth_3</th>\n",
       "      <th>mnth_4</th>\n",
       "      <th>mnth_5</th>\n",
       "      <th>...</th>\n",
       "      <th>workingday_1</th>\n",
       "      <th>weathersit_1</th>\n",
       "      <th>weathersit_2</th>\n",
       "      <th>weathersit_3</th>\n",
       "      <th>temp</th>\n",
       "      <th>atemp</th>\n",
       "      <th>hum</th>\n",
       "      <th>windspeed</th>\n",
       "      <th>yr</th>\n",
       "      <th>cnt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>365</th>\n",
       "      <td>366</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.387</td>\n",
       "      <td>0.389</td>\n",
       "      <td>0.712</td>\n",
       "      <td>0.350</td>\n",
       "      <td>1</td>\n",
       "      <td>2294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>366</th>\n",
       "      <td>367</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.267</td>\n",
       "      <td>0.227</td>\n",
       "      <td>0.392</td>\n",
       "      <td>0.633</td>\n",
       "      <td>1</td>\n",
       "      <td>1951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>367</th>\n",
       "      <td>368</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.113</td>\n",
       "      <td>0.062</td>\n",
       "      <td>0.454</td>\n",
       "      <td>0.708</td>\n",
       "      <td>1</td>\n",
       "      <td>2236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>368</th>\n",
       "      <td>369</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.060</td>\n",
       "      <td>0.053</td>\n",
       "      <td>0.426</td>\n",
       "      <td>0.335</td>\n",
       "      <td>1</td>\n",
       "      <td>2368</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>369</th>\n",
       "      <td>370</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.258</td>\n",
       "      <td>0.262</td>\n",
       "      <td>0.539</td>\n",
       "      <td>0.222</td>\n",
       "      <td>1</td>\n",
       "      <td>3272</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 37 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     instant  season_1  season_2  season_3  season_4  mnth_1  mnth_2  mnth_3  \\\n",
       "365      366         1         0         0         0       1       0       0   \n",
       "366      367         1         0         0         0       1       0       0   \n",
       "367      368         1         0         0         0       1       0       0   \n",
       "368      369         1         0         0         0       1       0       0   \n",
       "369      370         1         0         0         0       1       0       0   \n",
       "\n",
       "     mnth_4  mnth_5  ...   workingday_1  weathersit_1  weathersit_2  \\\n",
       "365       0       0  ...              0             1             0   \n",
       "366       0       0  ...              0             1             0   \n",
       "367       0       0  ...              1             1             0   \n",
       "368       0       0  ...              1             0             1   \n",
       "369       0       0  ...              1             1             0   \n",
       "\n",
       "     weathersit_3  temp  atemp   hum  windspeed  yr   cnt  \n",
       "365             0 0.387  0.389 0.712      0.350   1  2294  \n",
       "366             0 0.267  0.227 0.392      0.633   1  1951  \n",
       "367             0 0.113  0.062 0.454      0.708   1  2236  \n",
       "368             0 0.060  0.053 0.426      0.335   1  2368  \n",
       "369             0 0.258  0.262 0.539      0.222   1  3272  \n",
       "\n",
       "[5 rows x 37 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FE_data_test=FE_data[FE_data.yr==1]\n",
    "FE_data_test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 366 entries, 365 to 730\n",
      "Data columns (total 37 columns):\n",
      "instant         366 non-null int64\n",
      "season_1        366 non-null uint8\n",
      "season_2        366 non-null uint8\n",
      "season_3        366 non-null uint8\n",
      "season_4        366 non-null uint8\n",
      "mnth_1          366 non-null uint8\n",
      "mnth_2          366 non-null uint8\n",
      "mnth_3          366 non-null uint8\n",
      "mnth_4          366 non-null uint8\n",
      "mnth_5          366 non-null uint8\n",
      "mnth_6          366 non-null uint8\n",
      "mnth_7          366 non-null uint8\n",
      "mnth_8          366 non-null uint8\n",
      "mnth_9          366 non-null uint8\n",
      "mnth_10         366 non-null uint8\n",
      "mnth_11         366 non-null uint8\n",
      "mnth_12         366 non-null uint8\n",
      "holiday_0       366 non-null uint8\n",
      "holiday_1       366 non-null uint8\n",
      "weekday_0       366 non-null uint8\n",
      "weekday_1       366 non-null uint8\n",
      "weekday_2       366 non-null uint8\n",
      "weekday_3       366 non-null uint8\n",
      "weekday_4       366 non-null uint8\n",
      "weekday_5       366 non-null uint8\n",
      "weekday_6       366 non-null uint8\n",
      "workingday_0    366 non-null uint8\n",
      "workingday_1    366 non-null uint8\n",
      "weathersit_1    366 non-null uint8\n",
      "weathersit_2    366 non-null uint8\n",
      "weathersit_3    366 non-null uint8\n",
      "temp            366 non-null float64\n",
      "atemp           366 non-null float64\n",
      "hum             366 non-null float64\n",
      "windspeed       366 non-null float64\n",
      "yr              366 non-null int64\n",
      "cnt             366 non-null int64\n",
      "dtypes: float64(4), int64(3), uint8(30)\n",
      "memory usage: 33.6 KB\n"
     ]
    }
   ],
   "source": [
    "FE_data_test.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "FE_data_train.to_csv('FE_data_train.csv',index=False)\n",
    "FE_data_test.to_csv('FE_data_test.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
